bitkeeper revision 1.179.1.1 (3e9ee075wJmtFBkJEk-QAC5VB7htXg)

author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)

committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)
author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)
committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)
diff --git a/xen/TODO b/xen/TODO

index 9f834dd9580791c199b1913dfdf8f74d2b915b95..e5df50ac5b3e0fc49ba89a78e3be7264fb175286 100644 (file)
--- a/xen/TODO
+++ b/xen/TODO
@@ -7,20 +7,7 @@ longer-term goals.
   -- Keir (16/3/03)
  
  
-1. FIX HANDLING OF NETWORK RINGS
---------------------------------
-Handling of the transmit rings is currently very broken (for example,
-sending an inter-domain packet will wedge the hypervisor). This is
-because we may handle packets out of order (eg. inter-domain packets
-are handled eagerly, while packets for real interfaces are queued),
-but our current ring design really assumes in-order handling.
-
-A neat fix will be to allow responses to be queued in a different
-order to requests, just as we already do with block-device
-rings. We'll need to add an opaque identifier to ring entries,
-allowing matching of requests and responses, but that's about it.
-
-2. ACCURATE TIMERS AND WALL-CLOCK TIME
+1. ACCURATE TIMERS AND WALL-CLOCK TIME
  --------------------------------------
  Currently our long-term timebase free runs on CPU0, with no external
  calibration. We should run ntpd on domain 0 and allow this to warp
@@ -28,7 +15,7 @@ Xen's timebase. Once this is done, we can have a timebase per CPU and
  not worry about relative drift (since they'll all get sync'ed
  periodically by ntp).
  
-3. ASSIGNING DOMAINS TO PROCESSORS
+2. ASSIGNING DOMAINS TO PROCESSORS
  ----------------------------------
  More intelligent assignment of domains to processors. In
  particular, we don't play well with hyperthreading: we will assign
@@ -40,17 +27,17 @@ relationships between processors in the system (eg. which ones are
  siblings in the same package). We then use this to balance domains
  across packages, and across virtual processors within a package.
  
-4. PROPER DESTRUCTION OF DOMAINS
---------------------------------
-Currently we do not free resources when destroying a domain. This is
-because they may be tied up in subsystems, and there is no way of
-pulling them back in a safe manner.
+3. DOMAIN 0 MANAGEMENT DAEMON
+-----------------------------
+A better control daemon is required for domain 0, which keeps proper
+track of machine resources and can make sensible policy choices. This
+may require support in Xen; for example, notifications (eg. DOMn is
+killed), and requests (eg. can DOMn allocate x frames of memory?).
  
-The fix is probably to reference count resources and automatically
-free them when the count reaches zero. We may get away with one count
-per domain (for all its resources). When this reaches zero we know it
-is safe to free everything: block-device rings, network rings, and all
-the rest.
+4. SANE NETWORK ROUTING
+-----------------------
+The current virtual firewall/router is completely broken. Needs a new
+design and implementation!
  
  5. NETWORK CHECKSUM OFFLOAD 
  --------------------------- 
@@ -60,14 +47,7 @@ indicate, on transmit, which packets need the checksum added and, on
  receive, which packets have been checked out as okay. We can steal
  Linux's interface, which is entirely sane given NIC limitations.
  
-6. DOMAIN 0 MANAGEMENT DAEMON
------------------------------
-A better control daemon is required for domain 0, which keeps proper
-track of machine resources and can make sensible policy choices. This
-may require support in Xen; for example, notifications (eg. DOMn is
-killed), and requests (eg. can DOMn allocate x frames of memory?).
-
-7. MODULE SUPPORT FOR XEN
+6. MODULE SUPPORT FOR XEN
  -------------------------
  Network and blkdev drivers are bloating Xen. At some point we want to
  build drivers as modules, stick them in a cheesy ramfs, then relocate
@@ -79,7 +59,7 @@ which drivers to load.
  Most of the hard stuff (relocating and the like) is done for us by
  Linux's module system.
  
-8. NEW DESIGN FEATURES
+7. NEW DESIGN FEATURES
  ----------------------
  This includes the last-chance page cache, and the unified buffer cache.
  
diff --git a/xen/common/network.c b/xen/common/network.c

index 9e1bf7a00bed5c42aea202bf18a6e27a9c9694b0..2cdf11a9a595bd487f7b3d51d4498d6292632f52 100644 (file)
--- a/xen/common/network.c
+++ b/xen/common/network.c
@@ -5,7 +5,7 @@
   * with the virtual interfaces (vifs) and the virtual firewall/router through
   * the use of rules.
   *
- * Copyright (c) 2002, A K Warfield and K A Fraser
+ * Copyright (c) 2002-2003, A K Warfield and K A Fraser
   */
  
  #include <hypervisor-ifs/network.h>
@@ -67,7 +67,8 @@ net_vif_t *create_net_vif(int domain)
  
      shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
      if ( shadow_ring == NULL ) goto fail;
-    
+    memset(shadow_ring, 0, sizeof(*shadow_ring));
+
      shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
                      * sizeof(rx_shadow_entry_t), GFP_KERNEL);
      shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
@@ -75,9 +76,6 @@ net_vif_t *create_net_vif(int domain)
      if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) )
              goto fail;
  
-    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
-    shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
-    
      /*
       * Fill in the new vif struct. Note that, while the vif's refcnt is
       * non-zero, we hold a reference to the task structure.
@@ -121,7 +119,7 @@ void destroy_net_vif(net_vif_t *vif)
      /* Return any outstanding receive buffers to the guest OS. */
      spin_lock_irqsave(&p->page_lock, flags);
      for ( i  = vif->shadow_ring->rx_idx; 
-          i != vif->shadow_ring->rx_prod; 
+          i != vif->shadow_ring->rx_req_cons;
            i  = ((i+1) & (RX_RING_SIZE-1)) )
      {
          rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i;
@@ -263,7 +261,7 @@ void add_default_net_rule(int vif_id, u32 ipaddr)
      memset(&new_rule, 0, sizeof(net_rule_t));
      new_rule.dst_addr = ipaddr;
      new_rule.dst_addr_mask = 0xffffffff;
-    new_rule.src_interface = VIF_PHYSICAL_INTERFACE;
+    new_rule.src_interface = VIF_ANY_INTERFACE;
      new_rule.dst_interface = vif_id;
      new_rule.action = NETWORK_ACTION_ACCEPT;
      new_rule.proto = NETWORK_PROTO_ANY;
@@ -319,9 +317,8 @@ void print_net_rule_list()
   * Apply the rules to this skbuff and return the vif id that it is bound for.
   * If there is no match, VIF_DROP is returned.
   */
-
-int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port, 
-                  int src_vif)
+int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, 
+                  u16 src_port, u16 dst_port, int src_vif)
  {
      net_rule_ent_t *ent;
      int dest = VIF_DROP;
@@ -330,7 +327,7 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port
      
      ent = net_rule_list;
      
-    while (ent)
+    while ( ent != NULL )
      {
          if ( ((ent->r.src_interface == src_vif)
                || (ent->r.src_interface == VIF_ANY_INTERFACE)) &&
@@ -351,12 +348,19 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port
                 (tproto == IPPROTO_UDP)))
             )
          {
-            break;
+            /*
+             * XXX FFS! We keep going to find the "best" rule. Where best 
+             * corresponds to vaguely sane routing of a packet. We need a less 
+             * shafted model for aour "virtual firewall/router" methinks!
+             */
+            if ( dest < 0 )
+                dest = ent->r.dst_interface;
+            if ( dest >= 0 )
+                break;
          }
          ent = ent->next;
      }
  
-    if (ent) (dest = ent->r.dst_interface);
      read_unlock(&net_rule_lock);
      return dest;
  }
@@ -423,6 +427,7 @@ int __net_get_target_vif(u8 *data, unsigned int len, int src_vif)
      return target;
      
   drop:
+    printk("VIF%d: pkt to drop!\n", src_vif);
      return VIF_DROP;
  }
  
diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h

index 56a8f928816e256ae57a555616b06b46fb585a4c..4d4cfe93e6ed78582c003a68c0c3fa8d6cca4b67 100644 (file)
--- a/xen/include/hypervisor-ifs/network.h
+++ b/xen/include/hypervisor-ifs/network.h
@@ -14,50 +14,70 @@
  
  #include <linux/types.h>
  
-typedef struct tx_entry_st {
-    unsigned long  addr;   /* machine address of packet (IN VAR) */
-    unsigned short size;   /* in bytes (IN VAR) */
-    unsigned char  status; /* per descriptor status (OUT VAR) */
-    unsigned char  _unused;
+
+typedef struct tx_req_entry_st
+{
+    unsigned long  id;
+    unsigned long  addr;   /* machine address of packet */
+    unsigned short size;   /* packet size in bytes */
+} tx_req_entry_t;
+
+typedef struct tx_resp_entry_st
+{
+    unsigned long  id;
+    unsigned char  status;
+} tx_resp_entry_t;
+
+typedef union tx_entry_st
+{
+    tx_req_entry_t  req;
+    tx_resp_entry_t resp;
  } tx_entry_t;
  
-typedef struct rx_entry_st {
-    unsigned long  addr;   /* machine address of PTE to swizzle (IN VAR) */
-    unsigned short size;   /* in bytes (OUT VAR) */
-    unsigned char  status; /* per descriptor status (OUT VAR) */
-    unsigned char  offset; /* offset in page of received pkt (OUT VAR) */
+
+typedef struct rx_req_entry_st
+{
+    unsigned long  id;
+    unsigned long  addr;   /* machine address of PTE to swizzle */
+} rx_req_entry_t;
+
+typedef struct rx_resp_entry_st
+{
+    unsigned long  id;
+    unsigned short size;   /* received packet size in bytes */
+    unsigned char  status; /* per descriptor status */
+    unsigned char  offset; /* offset in page of received pkt */
+} rx_resp_entry_t;
+
+typedef union rx_entry_st
+{
+    rx_req_entry_t  req;
+    rx_resp_entry_t resp;
  } rx_entry_t;
  
+
  #define TX_RING_SIZE 256
  #define RX_RING_SIZE 256
-typedef struct net_ring_st {
+
+typedef struct net_ring_st
+{
      /*
-     * Guest OS places packets into ring at tx_prod.
-     * Hypervisor removes at tx_cons.
-     * Ring is empty when tx_prod == tx_cons.
-     * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
-     * Hypervisor may be prodded whenever tx_prod is updated, but this is
-     * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+     * Guest OS places packets into ring at tx_req_prod.
+     * Guest OS receives DOMAIN_EVENT_NET_TX when tx_resp_prod passes tx_event.
       */
      tx_entry_t *tx_ring;
-    unsigned int tx_prod, tx_cons, tx_event;
+    unsigned int tx_req_prod, tx_resp_prod, tx_event;
  
      /*
-     * Guest OS places empty buffers into ring at rx_prod.
-     * Hypervisor fills buffers as rx_cons.
-     * Ring is empty when rx_prod == rx_cons.
-     * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
-     * Hypervisor may be prodded whenever rx_prod is updated, but this is
-     * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+     * Guest OS places empty buffers into ring at rx_req_prod.
+     * Guest OS receives DOMAIN_EVENT_NET_RX when rx_rssp_prod passes rx_event.
       */
      rx_entry_t *rx_ring;
-    unsigned int rx_prod, rx_cons, rx_event;
+    unsigned int rx_req_prod, rx_resp_prod, rx_event;
  } net_ring_t;
  
-/* Specify base of per-domain array. Get returned free slot in the array. */
-/*net_ring_t *create_net_vif(int domain);*/
-
-/* Packet routing/filtering code follows:
+/*
+ * Packet routing/filtering code follows:
   */
  
  #define NETWORK_ACTION_ACCEPT   0
@@ -89,7 +109,7 @@ typedef struct net_rule_st
  typedef struct vif_query_st
  {
      unsigned int    domain;
-    char            *buf;   // where to put the reply -- guest virtual address
+    char            *buf;   /* reply buffer -- guest virtual address */
  } vif_query_t;
  
  /* Network trap operations and associated structure. 
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h

index 5534ee45f364f80c25f98c85259c44ed7203fa05..3bc997bed33013ce2ed56ca9546f98e1b5700723 100644 (file)
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -50,7 +50,7 @@ extern struct mm_struct init_mm;
  }
  
  #define _HYP_EVENT_NEED_RESCHED 0
-#define _HYP_EVENT_NET_RX       1
+#define _HYP_EVENT_NET          1
  #define _HYP_EVENT_DIE          2
  
  #define PF_DONEFPUINIT  0x1  /* Has the FPU been initialised for this task? */
diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h

index 22c6c25392c2bf3bebf3af0f5c4a7211a4cd6300..730c1cb084fec87b550ebb4058f98c63c1dca9b5 100644 (file)
--- a/xen/include/xeno/vif.h
+++ b/xen/include/xeno/vif.h
@@ -3,7 +3,7 @@
   * This is the hypervisor end of the network code.  The net_ring structure
   * stored in each vif is placed on a shared page to interact with the guest VM.
   *
- * Copyright (c) 2002, A K Warfield and K A Fraser
+ * Copyright (c) 2002-2003, A K Warfield and K A Fraser
   */
  
  /* virtual network interface struct and associated defines. */
@@ -25,45 +25,51 @@
   * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
   */
  
-typedef struct rx_shadow_entry_st {
+typedef struct rx_shadow_entry_st 
+{
+    unsigned long  id;
+    /* IN vars */
      unsigned long  addr;
+    /* OUT vars */
      unsigned short size;
      unsigned char  status;
      unsigned char  offset;
+    /* PRIVATE vars */
      unsigned long  flush_count;
  } rx_shadow_entry_t;
  
-typedef struct tx_shadow_entry_st {
+typedef struct tx_shadow_entry_st 
+{
+    unsigned long  id;
+    /* IN vars */
      void          *header;
      unsigned long  payload;
      unsigned short size;
+    /* OUT vars */
      unsigned char  status;
-    unsigned char  _unused;
  } tx_shadow_entry_t;
  
  typedef struct net_shadow_ring_st {
      rx_shadow_entry_t *rx_ring;
-    tx_shadow_entry_t *tx_ring;
-
-    /*
-     * Private copy of producer. Follows guest OS version, but never
-     * catches up with our consumer index.
-     */
-    unsigned int rx_prod;
-    /* Points at next buffer to be filled by NIC. Chases rx_prod. */
-    unsigned int rx_idx;
-    /* Points at next buffer to be returned to the guest OS. Chases rx_idx. */
-    unsigned int rx_cons;
+    unsigned int rx_prod;  /* More buffers for filling go here. */
+    unsigned int rx_idx;   /* Next buffer to fill is here. */
+    unsigned int rx_cons;  /* Next buffer to create response for is here. */
  
+    tx_shadow_entry_t *tx_ring;
      /*
-     * Private copy of producer. Follows guest OS version, but never
-     * catches up with our consumer index.
+     * These cannot be derived from shared variables, as not all packets
+     * will end up on the shadow ring (eg. locally delivered packets).
       */
-    unsigned int tx_prod;
-    /* Points at next buffer to be scheduled. Chases tx_prod. */
-    unsigned int tx_idx;
-    /* Points at next buffer to be returned to the guest OS. Chases tx_idx. */
-    unsigned int tx_cons;
+    unsigned int tx_prod;  /* More packets for sending go here. */
+    unsigned int tx_idx;   /* Next packet to send is here. */
+    unsigned int tx_transmitted_prod; /* Next packet to finish transmission. */
+    unsigned int tx_cons;  /* Next packet to create response for is here. */
+
+    /* Indexes into shared ring. */
+    unsigned int rx_req_cons;
+    unsigned int rx_resp_prod; /* private version of shared variable */
+    unsigned int tx_req_cons;
+    unsigned int tx_resp_prod; /* private version of shared variable */
  } net_shadow_ring_t;
  
  typedef struct net_vif_st {
diff --git a/xen/net/dev.c b/xen/net/dev.c

index cd9c2d18ee175c687bc92f870eb7d117363d5fd7..7fbf165b0e759c21d7c8e01959667fd649f0b1e2 100644 (file)
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -49,6 +49,15 @@
  #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
  #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
  
+static void make_tx_response(net_vif_t *vif, 
+                             unsigned long id, 
+                             unsigned char st);
+static void make_rx_response(net_vif_t     *vif, 
+                             unsigned long  id, 
+                             unsigned short size,
+                             unsigned char  st,
+                             unsigned char  off);
+
  struct net_device *the_dev = NULL;
  
  /*
@@ -482,6 +491,49 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb)
  
  struct netif_rx_stats netdev_rx_stat[NR_CPUS];
  
+/*
+ * update_shared_ring(void)
+ * 
+ * This replaces flush_rx_queue as the guest event handler to move packets
+ * queued in the guest ring up to the guest.  Really, the packet is already
+ * there, it was page flipped in deliver_packet, but this moves the ring
+ * descriptor across from the shadow ring and increments the pointers.
+ */
+void update_shared_ring(void)
+{
+    rx_shadow_entry_t *rx;
+    tx_shadow_entry_t *tx;
+    net_ring_t *net_ring;
+    net_shadow_ring_t *shadow_ring;
+    net_vif_t *vif;
+    struct list_head *ent;
+
+    clear_bit(_HYP_EVENT_NET, &current->hyp_events);
+
+    list_for_each(ent, &current->net_vifs)
+    {
+        vif = list_entry(ent, net_vif_t, dom_list);
+        net_ring    = vif->net_ring;
+        shadow_ring = vif->shadow_ring;
+
+        while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
+        {
+            rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
+            if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
+                __flush_tlb();
+            shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
+            make_rx_response(vif, rx->id, rx->size, rx->status, rx->offset);
+        }
+
+        while ( shadow_ring->tx_cons != shadow_ring->tx_transmitted_prod )
+        {
+            tx = shadow_ring->tx_ring + shadow_ring->tx_cons;
+            shadow_ring->tx_cons = RX_RING_INC(shadow_ring->tx_cons);
+            make_tx_response(vif, tx->id, tx->status);
+        }
+    }
+}
+
  void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
  {
      net_shadow_ring_t *shadow_ring;
@@ -489,7 +541,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
      unsigned long *g_pte; 
      struct pfn_info *g_pfn, *h_pfn;
      unsigned int i; 
-    unsigned long flags;
  
      memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
      if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
@@ -501,17 +552,13 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
  
      rx = shadow_ring->rx_ring + i;
  
-    if ( rx->status != RING_STATUS_OK )
-    {
-        DPRINTK("Bad buffer in deliver_packet()\n");
-        goto inc_and_out;
-    }
-
+    ASSERT(rx->status == RING_STATUS_OK);
      ASSERT(skb->len <= PAGE_SIZE);
+
      rx->size   = skb->len;
      rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
  
-    spin_lock_irqsave(&vif->domain->page_lock, flags);
+    spin_lock(&vif->domain->page_lock);
  
      g_pte = map_domain_mem(rx->addr);
  
@@ -541,12 +588,11 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
      list_del(&g_pfn->list);
      list_add(&h_pfn->list, &vif->domain->pg_head);
  
-    spin_unlock_irqrestore(&vif->domain->page_lock, flags);
+    spin_unlock(&vif->domain->page_lock);
      
      /* Our skbuff now points at the guest's old frame. */
      skb->pf = g_pfn;
  
- inc_and_out:        
      smp_wmb(); /* updates must happen before releasing the descriptor. */
      shadow_ring->rx_idx = RX_RING_INC(i);
  }
@@ -595,11 +641,11 @@ int netif_rx(struct sk_buff *skb)
      if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
          skb->dst_vif = __net_get_target_vif(skb->data, skb->len, skb->src_vif);
          
-    read_lock_irqsave(&sys_vif_lock, flags);
+    read_lock(&sys_vif_lock);
      if ( (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) ||
           ((vif = sys_vif_list[skb->dst_vif]) == NULL) )
      {
-        read_unlock_irqrestore(&sys_vif_lock, flags);
+        read_unlock(&sys_vif_lock);
          netdev_rx_stat[this_cpu].dropped++;
          unmap_domain_mem(skb->head);
          kfree_skb(skb);
@@ -608,10 +654,10 @@ int netif_rx(struct sk_buff *skb)
      }
  
      get_vif(vif);
-    read_unlock_irqrestore(&sys_vif_lock, flags);
+    read_unlock(&sys_vif_lock);
  
      deliver_packet(skb, vif);
-    cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET_RX);
+    cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
      put_vif(vif);
  
      unmap_domain_mem(skb->head);
@@ -676,10 +722,8 @@ static void add_to_net_schedule_list_tail(net_vif_t *vif)
  /* Destructor function for tx skbs. */
  static void tx_skb_release(struct sk_buff *skb)
  {
-    int i, send = 0;
+    int i;
      net_vif_t *vif = sys_vif_list[skb->src_vif];
-    unsigned int idx;
-    tx_shadow_entry_t *tx;
      unsigned long cpu_mask, flags;
      
      spin_lock_irqsave(&vif->domain->page_lock, flags);
@@ -692,51 +736,10 @@ static void tx_skb_release(struct sk_buff *skb)
  
      skb_shinfo(skb)->nr_frags = 0; 
  
-    /* This would mean that the guest OS has fiddled with our index. */
-    if ( vif->shadow_ring->tx_cons != vif->net_ring->tx_cons )
-        DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
-                vif->shadow_ring->tx_cons, vif->net_ring->tx_cons);
-
-    /*
-     * XXX This assumes that, per vif, SKBs are processed in-order!
-     * Also assumes no concurrency. This is safe because each vif
-     * maps to one NIC. This is executed in NIC interrupt code, so we have
-     * mutual exclusion from do_IRQ().
-     */
-
-    smp_wmb(); /* make sure any status updates occur before inc'ing tx_cons. */
-
-    /* Skip over a sequence of bad descriptors, plus the first good one. */
-    do {
-        idx = vif->shadow_ring->tx_cons;
-        /* There must be at least one good descriptor outstanding. */
-        if ( idx == vif->shadow_ring->tx_idx ) BUG();
-        tx  = &vif->shadow_ring->tx_ring[idx];
-        vif->shadow_ring->tx_cons = TX_RING_INC(idx);
-        if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1;
-    } while ( tx->status != RING_STATUS_OK );
-
-    /* Now skip over any more bad descriptors, up to the next good one. */
-    do {
-        idx = vif->shadow_ring->tx_cons;
-        tx  = &vif->shadow_ring->tx_ring[idx];
-        /* Carry on until we find a good descriptor, or reach scheduler idx. */
-        if ( (idx == vif->shadow_ring->tx_idx) || 
-             (tx->status == RING_STATUS_OK) )
-            break;
-        vif->shadow_ring->tx_cons = TX_RING_INC(idx);
-        if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1;
-    } while ( 1 );
-
-    /* Update shared consumer index to the new private value. */
-    vif->net_ring->tx_cons = vif->shadow_ring->tx_cons;
-
-    /* Send a transmit event if requested. */
-    if ( send )
-    {
-        cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX);
-        guest_event_notify(cpu_mask);
-    }
+    vif->shadow_ring->tx_transmitted_prod =
+        TX_RING_INC(vif->shadow_ring->tx_transmitted_prod);
+    cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
+    hyp_event_notify(cpu_mask);    
  
      put_vif(vif);
  }
@@ -765,27 +768,22 @@ static void net_tx_action(unsigned long unused)
              continue;
          }
  
-        /* Pick an entry from the transmit queue. */
-        tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
-        vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
-        if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
-            add_to_net_schedule_list_tail(vif);
-
-        /* Check the chosen entry is good. */
-        if ( tx->status != RING_STATUS_OK ) 
-        {
-            put_vif(vif);
-            continue;
-        }
-
          if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
          {
              printk("Out of memory in net_tx_action()!\n");
-            tx->status = RING_STATUS_BAD_PAGE;
+            add_to_net_schedule_list_tail(vif);
              put_vif(vif);
              break;
          }
          
+        /* Pick an entry from the transmit queue. */
+        tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
+        vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
+        if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
+            add_to_net_schedule_list_tail(vif);
+
+        ASSERT(tx->status == RING_STATUS_OK);
+
          skb->destructor = tx_skb_release;
          
          skb->head = skb->data = tx->header;
@@ -827,57 +825,6 @@ static inline void maybe_schedule_tx_action(void)
  }
  
  
-/*
- * update_shared_ring(void)
- * 
- * This replaces flush_rx_queue as the guest event handler to move packets
- * queued in the guest ring up to the guest.  Really, the packet is already
- * there, it was page flipped in deliver_packet, but this moves the ring
- * descriptor across from the shadow ring and increments the pointers.
- */
-
-void update_shared_ring(void)
-{
-    rx_shadow_entry_t *rx;
-    shared_info_t *s = current->shared_info;
-    net_ring_t *net_ring;
-    net_shadow_ring_t *shadow_ring;
-    net_vif_t *vif;
-    struct list_head *ent;
-
-    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
-
-    list_for_each(ent, &current->net_vifs)
-    {
-        vif = list_entry(ent, net_vif_t, dom_list);
-        net_ring    = vif->net_ring;
-        shadow_ring = vif->shadow_ring;
-
-        /* This would mean that the guest OS has fiddled with our index. */
-        if ( shadow_ring->rx_cons != net_ring->rx_cons )
-            DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
-                    shadow_ring->rx_cons, net_ring->rx_cons);
-
-        while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
-        {
-            rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
-            copy_to_user(net_ring->rx_ring + shadow_ring->rx_cons, rx, 
-                         sizeof(rx_entry_t));
-
-            if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
-                __flush_tlb();
-
-            smp_wmb(); /* copy descriptor before inc'ing rx_cons */
-            shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
-
-            if ( shadow_ring->rx_cons == net_ring->rx_event )
-                set_bit(_EVENT_NET_RX, &s->events);
-        }
-        net_ring->rx_cons = shadow_ring->rx_cons;
-    }
-}
-
-
  /*
   *     We need this ioctl for efficient implementation of the
   *     if_indextoname() function required by the IPv6 API.  Without
@@ -1847,10 +1794,10 @@ long do_net_update(void)
      net_ring_t *net_ring;
      net_shadow_ring_t *shadow_ring;
      net_vif_t *current_vif;
-    unsigned int i;
+    unsigned int i, j;
      struct sk_buff *skb;
-    tx_entry_t tx;
-    rx_shadow_entry_t *rx;
+    tx_req_entry_t tx;
+    rx_req_entry_t rx;
      unsigned long pfn;
      struct pfn_info *page;
      unsigned long *g_pte;    
@@ -1873,31 +1820,32 @@ long do_net_update(void)
           * new producer index, but take care not to catch up with our own
           * consumer index.
           */
-        for ( i = shadow_ring->tx_prod; 
-              (i != net_ring->tx_prod) && 
-                  (((shadow_ring->tx_cons-i) & (TX_RING_SIZE-1)) != 1); 
+        j = shadow_ring->tx_prod;
+        for ( i = shadow_ring->tx_req_cons; 
+              (i != net_ring->tx_req_prod) && 
+                  (((shadow_ring->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
                i = TX_RING_INC(i) )
          {
-            if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
+            if ( copy_from_user(&tx, &net_ring->tx_ring[i].req, sizeof(tx)) )
              {
                  DPRINTK("Bad copy_from_user for tx net descriptor\n");
-                shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU;
+                make_tx_response(current_vif, tx.id, RING_STATUS_ERR_CFU);
                  continue;
              }
  
-            shadow_ring->tx_ring[i].size   = tx.size;
-            shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE;
-
-            if ( tx.size < PKT_PROT_LEN )
+            if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
              {
-                DPRINTK("Runt packet %d\n", tx.size);
+                DPRINTK("Bad packet size: %d\n", tx.size);
+                make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
                  continue; 
              }
  
+            /* No crossing a page boundary as the payload mustn't fragment. */
              if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 
              {
                  DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 
                          tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
+                make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
                  continue;
              }
  
@@ -1909,6 +1857,7 @@ long do_net_update(void)
              {
                  DPRINTK("Bad page frame\n");
                  spin_unlock_irq(&current->page_lock);
+                make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
                  continue;
              }
              
@@ -1917,45 +1866,61 @@ long do_net_update(void)
              protocol = __constant_htons(
                  init_tx_header(g_data, tx.size, the_dev));
              if ( protocol == 0 )
+            {
+                make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
                  goto tx_unmap_and_continue;
+            }
  
              target = __net_get_target_vif(g_data, tx.size, current_vif->id);
  
              if ( target > VIF_PHYSICAL_INTERFACE )
              {
                  /* Local delivery */
-                if ( (skb = dev_alloc_skb(tx.size)) == NULL ) 
+                if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
+                {
+                    make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
                      goto tx_unmap_and_continue;
-                
-                skb->destructor = tx_skb_release;
-                get_vif(current_vif);
-
-                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+                }
  
                  skb->src_vif = current_vif->id;
                  skb->dst_vif = target;
-                skb->protocol = protocol;
-                
+                skb->protocol = protocol;                
+
+                /*
+                 * We don't need a well-formed skb as netif_rx will fill these
+                 * fields in as necessary. All we actually need is the right
+                 * page offset in skb->data, and the right length in skb->len.
+                 * Note that the correct address/length *excludes* link header.
+                 */
                  skb->head = (u8 *)map_domain_mem(
                      ((skb->pf - frame_table) << PAGE_SHIFT));
-                skb->data = skb->head + 16;
-                skb_reserve(skb,2);
+                skb->data = skb->head + 18;
                  memcpy(skb->data, g_data, tx.size);
-                skb->len = tx.size;
-                unmap_domain_mem(skb->head);
                  skb->data += ETH_HLEN;
+                skb->len = tx.size - ETH_HLEN;
+                unmap_domain_mem(skb->head);
+
                  (void)netif_rx(skb);
+
+                make_tx_response(current_vif, tx.id, RING_STATUS_OK);
              }
              else if ( target == VIF_PHYSICAL_INTERFACE )
              {
-                shadow_ring->tx_ring[i].header = 
+                shadow_ring->tx_ring[j].id     = tx.id;
+                shadow_ring->tx_ring[j].size   = tx.size;
+                shadow_ring->tx_ring[j].status = RING_STATUS_OK;
+                shadow_ring->tx_ring[j].header = 
                      kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
-                if ( shadow_ring->tx_ring[i].header == NULL ) 
+                if ( shadow_ring->tx_ring[j].header == NULL )
+                { 
+                    make_tx_response(current_vif, tx.id, RING_STATUS_OK);
                      goto tx_unmap_and_continue;
-                memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN);
-                shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN;
-                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+                }
+
+                memcpy(shadow_ring->tx_ring[j].header, g_data, PKT_PROT_LEN);
+                shadow_ring->tx_ring[j].payload = tx.addr + PKT_PROT_LEN;
                  get_page_tot(page);
+                j = TX_RING_INC(j);
              }
  
          tx_unmap_and_continue:
@@ -1963,10 +1928,12 @@ long do_net_update(void)
              spin_unlock_irq(&current->page_lock);
          }
  
-        if ( shadow_ring->tx_prod != i )
+        shadow_ring->tx_req_cons = i;
+
+        if ( shadow_ring->tx_prod != j )
          {
              smp_mb(); /* Let other CPUs see new descriptors first. */
-            shadow_ring->tx_prod = i;
+            shadow_ring->tx_prod = j;
              add_to_net_schedule_list_tail(current_vif);
              maybe_schedule_tx_action();
          }
@@ -1980,29 +1947,23 @@ long do_net_update(void)
           * new producer index, but take care not to catch up with our own
           * consumer index.
           */
-        for ( i = shadow_ring->rx_prod; 
-              (i != net_ring->rx_prod) && 
-                  (((shadow_ring->rx_cons-i) & (RX_RING_SIZE-1)) != 1); 
+        j = shadow_ring->rx_prod;
+        for ( i = shadow_ring->rx_req_cons; 
+              (i != net_ring->rx_req_prod) && 
+                  (((shadow_ring->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 
                i = RX_RING_INC(i) )
          {
-            /* 
-             * This copy assumes that rx_shadow_entry_t is an extension of 
-             * rx_net_entry_t extra fields must be tacked on to the end.
-             */
-            if ( copy_from_user(shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
-                                sizeof (rx_entry_t) ) )
+            if ( copy_from_user(&rx, &net_ring->rx_ring[i].req, sizeof(rx)) )
              {
-                DPRINTK("Bad copy_from_user for rx ring\n");
-                shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
+                DPRINTK("Bad copy_from_user for rx net descriptor\n");
+                make_rx_response(current_vif, 
+                                 rx.id, 0, RING_STATUS_ERR_CFU, 0);
                  continue;
-            } 
+            }
  
-            rx = shadow_ring->rx_ring + i;
-            pfn = rx->addr >> PAGE_SHIFT;
+            pfn = rx.addr >> PAGE_SHIFT;
              page = frame_table + pfn;
              
-            shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
-            
              spin_lock_irq(&current->page_lock);
              if ( (pfn >= max_page) || 
                   (page->flags != (PGT_l1_page_table | current->domain)) ) 
@@ -2010,14 +1971,18 @@ long do_net_update(void)
                  DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
                          current->domain, pfn, max_page, page->flags);
                  spin_unlock_irq(&current->page_lock);
+                make_rx_response(current_vif, 
+                                 rx.id, 0, RING_STATUS_BAD_PAGE, 0);
                  continue;
              }
              
-            g_pte = map_domain_mem(rx->addr);
+            g_pte = map_domain_mem(rx.addr);
              
              if ( !(*g_pte & _PAGE_PRESENT) )
              {
-                DPRINTK("Inavlid PTE passed down (not present)\n");
+                DPRINTK("Invalid PTE passed down (not present)\n");
+                make_rx_response(current_vif, 
+                                 rx.id, 0, RING_STATUS_BAD_PAGE, 0);
                  goto rx_unmap_and_continue;
              }
              
@@ -2027,25 +1992,32 @@ long do_net_update(void)
              {
                 DPRINTK("RX page mapped multple times (%d/%d/%08x)\n",
                 page->type_count, page->tot_count, page->flags);
-                
+                make_rx_response(current_vif, 
+                                 rx.id, 0, RING_STATUS_BAD_PAGE, 0);
                  goto rx_unmap_and_continue;
              }
              
              /* The pte they passed was good, so take it away from them. */
-            shadow_ring->rx_ring[i].status = RING_STATUS_OK;
              *g_pte &= ~_PAGE_PRESENT;
              page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
-            rx->flush_count = tlb_flush_count[smp_processor_id()];
+            shadow_ring->rx_ring[j].id          = rx.id;
+            shadow_ring->rx_ring[j].addr        = rx.addr;
+            shadow_ring->rx_ring[j].status      = RING_STATUS_OK;
+            shadow_ring->rx_ring[j].flush_count = 
+                tlb_flush_count[smp_processor_id()];
+            j = RX_RING_INC(j);
              
          rx_unmap_and_continue:
              unmap_domain_mem(g_pte);
              spin_unlock_irq(&current->page_lock);
          }
  
-        if ( shadow_ring->rx_prod != i )
+        shadow_ring->rx_req_cons = i;
+
+        if ( shadow_ring->rx_prod != j )
          {
              smp_mb(); /* Let other CPUs see new descriptors first. */
-            shadow_ring->rx_prod = i;
+            shadow_ring->rx_prod = j;
          }
      }
  
@@ -2053,6 +2025,58 @@ long do_net_update(void)
  }
  
  
+static void make_tx_response(net_vif_t *vif, 
+                             unsigned long id, 
+                             unsigned char st)
+{
+    unsigned long flags;
+    net_shadow_ring_t *shadow = vif->shadow_ring;
+    unsigned int pos;
+    tx_resp_entry_t *resp, privresp;
+
+    /* Place on the response ring for the relevant domain. */ 
+    local_irq_save(flags);
+    pos  = shadow->tx_resp_prod;
+    resp = &vif->net_ring->tx_ring[pos].resp;
+    privresp.id     = id;
+    privresp.status = st;
+    copy_to_user(resp, &privresp, sizeof(privresp));
+    pos = TX_RING_INC(pos);
+    shadow->tx_resp_prod = vif->net_ring->tx_resp_prod = pos;
+    if ( pos == vif->net_ring->rx_event )
+        set_bit(_EVENT_NET_TX, &current->shared_info->events);
+    local_irq_restore(flags);
+}
+
+
+static void make_rx_response(net_vif_t     *vif, 
+                             unsigned long  id, 
+                             unsigned short size,
+                             unsigned char  st,
+                             unsigned char  off)
+{
+    unsigned long flags;
+    net_shadow_ring_t *shadow = vif->shadow_ring;
+    unsigned int pos;
+    rx_resp_entry_t *resp, privresp;
+
+    /* Place on the response ring for the relevant domain. */ 
+    local_irq_save(flags);
+    pos  = shadow->rx_resp_prod;
+    resp = &vif->net_ring->rx_ring[pos].resp;
+    privresp.id     = id;
+    privresp.size   = size;
+    privresp.status = st;
+    privresp.offset = off;
+    copy_to_user(resp, &privresp, sizeof(privresp));
+    pos = RX_RING_INC(pos);
+    shadow->rx_resp_prod = vif->net_ring->rx_resp_prod = pos;
+    if ( pos == vif->net_ring->rx_event )
+        set_bit(_EVENT_NET_RX, &current->shared_info->events);
+    local_irq_restore(flags);
+}
+
+
  int setup_network_devices(void)
  {
      int ret;
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c

index 01e81e0cf02827abc837c9c42b86f2bed8342a8a..3ae5e3d1a0d9a588d450bee45b829a45fcaf2454 100644 (file)
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
@@ -3,7 +3,7 @@
   * 
   * Virtual network driver for XenoLinux.
   * 
- * Copyright (c) 2002, K A Fraser
+ * Copyright (c) 2002-2003, K A Fraser
   */
  
  #include <linux/config.h>
@@ -47,21 +47,14 @@ static void cleanup_module(void);
  
  static struct list_head dev_list;
  
-/*
- * RX RING:   RX_IDX <= rx_cons <= rx_prod
- * TX RING:   TX_IDX <= tx_cons <= tx_prod
- * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor)
- */
  struct net_private
  {
      struct list_head list;
      struct net_device *dev;
  
      struct net_device_stats stats;
-    struct sk_buff **tx_skb_ring;
-    struct sk_buff **rx_skb_ring;
      atomic_t tx_entries;
-    unsigned int rx_idx, tx_idx, tx_full;
+    unsigned int rx_resp_cons, tx_resp_cons, tx_full;
      net_ring_t *net_ring;
      spinlock_t tx_lock;
  };
@@ -71,10 +64,10 @@ static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
  {
      struct net_device *dev = (struct net_device *)dev_id;
      struct net_private *np = dev->priv;
-    printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_idx = %d,"
-           " tx_cons = %d, tx_prod = %d, tx_event = %d, state=%d\n",
-           np->tx_full, atomic_read(&np->tx_entries), np->tx_idx, 
-           np->net_ring->tx_cons, np->net_ring->tx_prod, 
+    printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
+           " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
+           np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, 
+           np->net_ring->tx_req_prod, np->net_ring->tx_resp_prod, 
             np->net_ring->tx_event,
             test_bit(__LINK_STATE_XOFF, &dev->state));
  }
@@ -85,29 +78,17 @@ static int network_open(struct net_device *dev)
      struct net_private *np = dev->priv;
      int error = 0;
  
-    np->rx_idx = np->tx_idx = np->tx_full = 0;
-
+    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
      memset(&np->stats, 0, sizeof(np->stats));
-
      spin_lock_init(&np->tx_lock);
-
      atomic_set(&np->tx_entries, 0);
+    memset(np->net_ring, 0, sizeof(*np->net_ring));
  
-    np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0;
-    np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0;
-    np->net_ring->tx_ring = NULL;
-    np->net_ring->rx_ring = NULL;
-
-    np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *),
-                              GFP_KERNEL);
-    np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *),
-                              GFP_KERNEL);
      np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), 
                                      GFP_KERNEL);
      np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), 
                                      GFP_KERNEL);
-    if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) ||
-         (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
+    if ( (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
      {
          printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
          error = -ENOBUFS;
@@ -156,8 +137,6 @@ static int network_open(struct net_device *dev)
   fail:
      if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
      if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
-    if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
-    if ( np->tx_skb_ring ) kfree(np->tx_skb_ring);
      kfree(np);
      return error;
  }
@@ -169,28 +148,29 @@ static void network_tx_buf_gc(struct net_device *dev)
      struct net_private *np = dev->priv;
      struct sk_buff *skb;
      unsigned long flags;
-    unsigned int cons;
+    unsigned int prod;
+    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  
      spin_lock_irqsave(&np->tx_lock, flags);
  
      do {
-        cons = np->net_ring->tx_cons;
+        prod = np->net_ring->tx_resp_prod;
  
-        for ( i = np->tx_idx; i != cons; i = TX_RING_INC(i) )
+        for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
          {
-            skb = np->tx_skb_ring[i];
+            skb = (struct sk_buff *)tx_ring[i].resp.id;
              dev_kfree_skb_any(skb);
              atomic_dec(&np->tx_entries);
          }
          
-        np->tx_idx = i;
+        np->tx_resp_cons = prod;
          
          /* Set a new event, then check for race with update of tx_cons. */
          np->net_ring->tx_event =
-            TX_RING_ADD(cons, (atomic_read(&np->tx_entries)>>1) + 1);
+            TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
          smp_mb();
      }
-    while ( cons != np->net_ring->tx_cons );
+    while ( prod != np->net_ring->tx_resp_prod );
  
      if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
      {
@@ -201,21 +181,13 @@ static void network_tx_buf_gc(struct net_device *dev)
      spin_unlock_irqrestore(&np->tx_lock, flags);
  }
  
-inline unsigned long get_ppte(unsigned long addr)
+inline pte_t *get_ppte(void *addr)
  {
-    unsigned long ppte;
-    pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
-    pgd = pgd_offset_k(addr);
-
-    if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
-        
-    pmd = pmd_offset(pgd, addr);
-    if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); 
-        
-    ptep = pte_offset(pmd, addr);
-    ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep));
-
-    return ppte;
+    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+    pgd = pgd_offset_k(   (unsigned long)addr);
+    pmd = pmd_offset(pgd, (unsigned long)addr);
+    pte = pte_offset(pmd, (unsigned long)addr);
+    return pte;
  }
  
  static void network_alloc_rx_buffers(struct net_device *dev)
@@ -223,21 +195,21 @@ static void network_alloc_rx_buffers(struct net_device *dev)
      unsigned int i;
      struct net_private *np = dev->priv;
      struct sk_buff *skb;
-    unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);    
+    unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);    
  
-    for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
+    for ( i = np->net_ring->rx_req_prod; i != end; i = RX_RING_INC(i) )
      {
          skb = dev_alloc_skb(RX_BUF_SIZE);
          if ( skb == NULL ) break;
          skb->dev = dev;
-        np->rx_skb_ring[i] = skb;
-        np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head); 
-        np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
+        np->net_ring->rx_ring[i].req.id   = (unsigned long)skb;
+        np->net_ring->rx_ring[i].req.addr = 
+            virt_to_machine(get_ppte(skb->head));
      }
  
-    np->net_ring->rx_prod = i;
+    np->net_ring->rx_req_prod = i;
  
-    np->net_ring->rx_event = RX_RING_INC(np->rx_idx);
+    np->net_ring->rx_event = RX_RING_INC(np->rx_resp_cons);
  
      /*
       * We may have allocated buffers which have entries outstanding in
@@ -254,9 +226,11 @@ static void network_free_rx_buffers(struct net_device *dev)
      struct net_private *np = dev->priv;
      struct sk_buff *skb;    
  
-    for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) )
+    for ( i  = np->rx_resp_cons; 
+          i != np->net_ring->rx_req_prod; 
+          i  = RX_RING_INC(i) )
      {
-        skb = np->rx_skb_ring[i];
+        skb = (struct sk_buff *)np->net_ring->rx_ring[i].req.id;
          dev_kfree_skb_any(skb);
      }
  }
@@ -272,7 +246,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
          netif_stop_queue(dev);
          return -ENOBUFS;
      }
-    i = np->net_ring->tx_prod;
+    i = np->net_ring->tx_req_prod;
  
      if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
      {
@@ -284,11 +258,11 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
          skb = new_skb;
      }   
      
-    np->tx_skb_ring[i] = skb;
-    np->net_ring->tx_ring[i].addr =
-        (unsigned long)phys_to_machine(virt_to_phys(skb->data));
-    np->net_ring->tx_ring[i].size = skb->len;
-    np->net_ring->tx_prod = TX_RING_INC(i);
+    np->net_ring->tx_ring[i].req.id   = (unsigned long)skb;
+    np->net_ring->tx_ring[i].req.addr =
+        phys_to_machine(virt_to_phys(skb->data));
+    np->net_ring->tx_ring[i].req.size = skb->len;
+    np->net_ring->tx_req_prod = TX_RING_INC(i);
      atomic_inc(&np->tx_entries);
  
      np->stats.tx_bytes += skb->len;
@@ -316,13 +290,15 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
      struct net_device *dev = (struct net_device *)dev_id;
      struct net_private *np = dev->priv;
      struct sk_buff *skb;
-    rx_entry_t *rx;
+    rx_resp_entry_t *rx;
      
   again:
-    for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
+    for ( i  = np->rx_resp_cons; 
+          i != np->net_ring->rx_resp_prod; 
+          i  = RX_RING_INC(i) )
      {
-        rx  = &np->net_ring->rx_ring[i];
-        skb = np->rx_skb_ring[i];
+        rx  = &np->net_ring->rx_ring[i].resp;
+        skb = (struct sk_buff *)rx->id;
  
          if ( rx->status != RING_STATUS_OK )
          {
@@ -341,8 +317,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
          skb_shinfo(skb)->frag_list = NULL;
                                  
          phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
-            (*(unsigned long *)phys_to_virt(machine_to_phys(rx->addr))
-                ) >> PAGE_SHIFT;
+            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  
          if ( rx->offset < 16 )
          {
@@ -353,23 +328,23 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
          
          skb_reserve(skb, rx->offset - 16);
  
-        skb_put(skb, np->net_ring->rx_ring[i].size);
+        skb_put(skb, rx->size);
          skb->protocol = eth_type_trans(skb, dev);
  
          np->stats.rx_packets++;
  
-        np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
+        np->stats.rx_bytes += rx->size;
          netif_rx(skb);
          dev->last_rx = jiffies;
      }
  
-    np->rx_idx = i;
+    np->rx_resp_cons = i;
  
      network_alloc_rx_buffers(dev);
      
      /* Deal with hypervisor racing our resetting of rx_event. */
      smp_mb();
-    if ( np->net_ring->rx_cons != i ) goto again;
+    if ( np->net_ring->rx_resp_prod != i ) goto again;
  }
  
  
@@ -382,8 +357,6 @@ static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
  
  int network_close(struct net_device *dev)
  {
-    struct net_private *np = dev->priv;
-
      netif_stop_queue(dev);
  
      free_irq(NET_RX_IRQ, dev);
@@ -401,9 +374,6 @@ int network_close(struct net_device *dev)
      kfree(np->net_ring->tx_ring);
  #endif
  
-    kfree(np->rx_skb_ring);
-    kfree(np->tx_skb_ring);
-
      MOD_DEC_USE_COUNT;
  
      return 0;
author	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)
committer	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Thu, 17 Apr 2003 17:12:21 +0000 (17:12 +0000)
xen/TODO		patch \| blob \| history
xen/common/network.c		patch \| blob \| history
xen/include/hypervisor-ifs/network.h		patch \| blob \| history
xen/include/xeno/sched.h		patch \| blob \| history
xen/include/xeno/vif.h		patch \| blob \| history
xen/net/dev.c		patch \| blob \| history
xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c		patch \| blob \| history